Importing Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(haven)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 2.0.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x data.table::between() masks dplyr::between()
## x dplyr::filter() masks stats::filter()
## x data.table::first() masks dplyr::first()
## x dplyr::lag() masks stats::lag()
## x data.table::last() masks dplyr::last()
## x purrr::transpose() masks data.table::transpose()
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:data.table':
##
## first, last
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## == Need to Learn tidyquant? ====================================================
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(tidyr)
library(lubridate)
library(naniar)
library(readxl)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(fredr)
library(openxlsx)
library(fredr)
library(zoo)
library(frenchdata)
library(NMOF)
##
## Attaching package: 'NMOF'
## The following objects are masked from 'package:lubridate':
##
## duration, pm
library(datetimeutils)
##
## Attaching package: 'datetimeutils'
## The following objects are masked from 'package:lubridate':
##
## mday, mday<-, month, year
## The following objects are masked from 'package:data.table':
##
## mday, month, year
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(geckor)
## R client for the CoinGecko API
## Developed by Next Game Solutions (http://nextgamesolutions.com)
library(tibbletime)
##
## Attaching package: 'tibbletime'
## The following object is masked from 'package:stats':
##
## filter
library(zeallot)
library(sandwich)
library(lmtest)
library(broom)
library(shiny)
##
## Attaching package: 'shiny'
## The following object is masked from 'package:NMOF':
##
## NS
library(plotly)
library(pracma)
##
## Attaching package: 'pracma'
## The following objects are masked from 'package:Matrix':
##
## expm, lu, tril, triu
## The following object is masked from 'package:purrr':
##
## cross
library(tseries)
library(rugarch)
## Loading required package: parallel
##
## Attaching package: 'rugarch'
## The following object is masked from 'package:pracma':
##
## pdist
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:stats':
##
## sigma
plot_data_var = function(master_data_join, datatype) {
len <- dim(master_data_join)
plot <- master_data_join %>% plot_ly()
for(i in seq_along(master_data_join)) {
if (i<2) {
next
} else {
x <- plot %>% add_trace(x = ~DATE, y=master_data_join[[i]] ,mode = 'bar', name=colnames(master_data_join)[i])
}
plot <- x
}
plot %>%
layout(title = datatype,
barmode = 'relative',
#hovermode = 'compare',
xaxis = list(title=''),
margin = list(l = 75, r = 75, b = 50, t = 50, pad = 4),
xaxis = list(title = ""),
yaxis = list(side = 'left',
title = "Portfolio Cumulative Performance",
showgrid = FALSE,
zeroline = TRUE,
color = 'green'),
legend = list(traceorder = 'reversed',orientation = "h"))
}
Generating sample opf 10 years window for analysis and sample 250 companies to
# set.seed(903559177)
# starting_year <- sample(1980:2010,1)
# print(starting_year)
#
# path = "C:\\Users\\Maharshi Vyas\\Downloads\\dsf_new.csv"
#
# daily_data <- fread(path, select=c('DATE','CUSIP', 'PRC', 'RET','RETX','VWRETD'))
#
# # This is basic cleaning, typing date correctly, and changing price to absolute value as mentioned in assginment document
# daily_data <- daily_data %>%
# tibble() %>%
# filter(DATE >= starting_year*10000, DATE <= (starting_year+11)*10000) %>%
# mutate(RET = as.numeric(RET)) %>%
# mutate(PRC = ABS(PRC),
# DATE = as.Date(parse_date_time(DATE, orders = "Ymd")))
#
# set.seed(903559177)
# selected_companies <- daily_data %>%
# select(c(CUSIP)) %>%
# distinct() %>%
# sample_n(250)
#
# daily_data <- daily_data %>%
# inner_join(selected_companies)
#
# write.csv(daily_data, "D:\\Data\\daily_250_10_years.csv")
# rm(daily_data)
daily_data <- fread( "D:\\Data\\daily_250_10_years.csv",header = T) %>%
tibble()
daily_data_2010 <- fread( "D:\\Data\\daily_250_2010_years.csv",header = T) %>%
tibble()
## My random 5 copmanies
set.seed(10)
selected_companies <- daily_data %>%
select(c(CUSIP)) %>%
distinct() %>%
sample_n(5)
daily_data <- daily_data %>%
mutate(month = floor_date(DATE, unit = "month")) %>%
mutate(ret_positive = ifelse(RET>0,RET,0),
ret_negative = ifelse(RET<0,RET,0),
mkt_ret_positive = ifelse(VWRETD>0,VWRETD,0),
mkt_ret_negative = ifelse(VWRETD<0,VWRETD,0),
) %>%
drop_na(RET) %>%
rename(mkt_ret = VWRETD)
daily_data_2010 <- daily_data_2010 %>%
mutate(month = floor_date(DATE, unit = "month")) %>%
mutate(ret_positive = ifelse(RET>0,RET,0),
ret_negative = ifelse(RET<0,RET,0),
mkt_ret_positive = ifelse(VWRETD>0,VWRETD,0),
mkt_ret_negative = ifelse(VWRETD<0,VWRETD,0),
) %>%
drop_na(RET) %>%
rename(mkt_ret = VWRETD)
set.seed(10)
selected_companies_2010 <- daily_data_2010 %>%
select(c(CUSIP)) %>%
distinct() %>%
sample_n(5)
calculate_beta <- function(stock_returns, market_signed_returns, market_return) {
dot(stock_returns,market_signed_returns)/sum(market_return**2)
}
co_skewness <- function(stock_return, market_return, total) {
if(sum(stock_return**2) == 0)
return(0)
dot(stock_return, market_return**2)/(sqrt(sum(stock_return**2)/total)*sum(market_return**2))
}
co_kurtosis <- function(stock_return, market_return, total) {
if(sum(stock_return**2) == 0)
return(0)
dot(stock_return, market_return**3)/(sqrt(sum(stock_return**2)/total)*(sum(market_return**2)**1.5))
}
monthly_betas <- daily_data %>%
group_by(month, CUSIP) %>%
summarise(n = n(),
beta_n = calculate_beta(ret_negative,mkt_ret_negative,mkt_ret),
beta_p = calculate_beta(ret_positive,mkt_ret_positive,mkt_ret),
beta_mn = calculate_beta(ret_positive,mkt_ret_negative,mkt_ret),
beta_mp = calculate_beta(ret_negative,mkt_ret_positive,mkt_ret),
beta_c = calculate_beta(RET,mkt_ret,mkt_ret),
down_beta = calculate_beta(RET,mkt_ret_negative,mkt_ret_negative),
up_beta = calculate_beta(RET,mkt_ret_positive,mkt_ret_positive),
coskewness = co_skewness(RET,mkt_ret,n),
cokurtosis = co_kurtosis(RET,mkt_ret,n)) %>%
mutate(beta = beta_n + beta_p + beta_mn + beta_mp,
check = beta-beta_c) ## check verifies if beta = beta_c, just to confirm our calculations are correct. All values are ##coming appoximately 0, hence our values are correct
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
#beta_check = calculate_beta())
returnTop5 <- function(returns) {
quantile(returns, probs=c(0.01), na.rm = TRUE)
}
expected_shortfall <- function(returns) {
val <- quantile(returns, probs=c(0.01), na.rm = TRUE)
#returns %>% tibble() %>% filter(returns > )
mean(returns[returns<val], na.rm = TRUE)
}
returnTop1 <- function(returns) {
quantile(returns, probs=c(0.99))
}
daily_portfolio <- daily_data %>%
arrange(DATE) %>%
group_by(DATE) %>%
summarise(n = n(),portfolio_returns = mean(RET,na.rm=TRUE)) %>%
ungroup() %>%
mutate(
VaR_5 = rollify(returnTop5,100)(portfolio_returns),
portfolio_value = 250000000*cumprod(1+portfolio_returns),
cumulative_return = cumprod(1+portfolio_returns),
VaR_value = VaR_5*portfolio_value,
portfolio_shortfall = rollify(expected_shortfall,100)(portfolio_returns))
daily_portfolio
## # A tibble: 2,781 x 8
## DATE n portfolio_returns VaR_5 portfolio_value cumulative_return
## <date> <int> <dbl> <dbl> <dbl> <dbl>
## 1 1988-01-04 109 0.0409 NA 260213773. 1.04
## 2 1988-01-05 109 0.0188 NA 265113617. 1.06
## 3 1988-01-06 109 0.00923 NA 267561788. 1.07
## 4 1988-01-07 109 0.00820 NA 269754953. 1.08
## 5 1988-01-08 109 -0.0213 NA 264010249. 1.06
## 6 1988-01-11 109 -0.0119 NA 260867546. 1.04
## 7 1988-01-12 109 -0.0134 NA 257377531. 1.03
## 8 1988-01-13 109 0.00614 NA 258956807. 1.04
## 9 1988-01-14 110 -0.000793 NA 258751456. 1.04
## 10 1988-01-15 110 0.0183 NA 263491997. 1.05
## # ... with 2,771 more rows, and 2 more variables: VaR_value <dbl>,
## # portfolio_shortfall <dbl>
## Initial Values will be NA because of rolling
daily_portfolio_2010 <- daily_data_2010 %>%
arrange(DATE) %>%
group_by(DATE) %>%
summarise(n = n(),portfolio_returns = mean(RET,na.rm=TRUE)) %>%
ungroup() %>%
mutate(
VaR_5 = rollify(returnTop5,100)(portfolio_returns),
portfolio_value = 250000000*cumprod(1+portfolio_returns),
cumulative_return = cumprod(1+portfolio_returns),
VaR_value = VaR_5*portfolio_value,
portfolio_shortfall = rollify(expected_shortfall,100)(portfolio_returns))
daily_portfolio_2010
## # A tibble: 2,767 x 8
## DATE n portfolio_returns VaR_5 portfolio_value cumulative_return
## <date> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2000-01-03 124 -0.00323 NA 249192631. 0.997
## 2 2000-01-04 124 -0.0106 NA 246549159. 0.986
## 3 2000-01-05 124 0.00743 NA 248379942. 0.994
## 4 2000-01-06 124 0.0132 NA 251663202. 1.01
## 5 2000-01-07 124 0.0168 NA 255885626. 1.02
## 6 2000-01-10 124 0.00941 NA 258292686. 1.03
## 7 2000-01-11 124 -0.00694 NA 256499639. 1.03
## 8 2000-01-12 124 -0.00437 NA 255378713. 1.02
## 9 2000-01-13 124 0.00678 NA 257111210. 1.03
## 10 2000-01-14 124 0.00349 NA 258008970. 1.03
## # ... with 2,757 more rows, and 2 more variables: VaR_value <dbl>,
## # portfolio_shortfall <dbl>
## Initial Values will be NA because of rolling
# mod_fit[0]
#
# coef <- coef(mod_fit)
# coef[3]
# coef <- coef %>% tibble()
# val <- 2
# val
# garchvol <- sigma(mod_fit) %>% tibble()
# garchvol
# spec <- getspec(mod_fit)
# setfixed(spec) <- as.list(coef(mod_fit))
# garchforecast1 <- ugarchforecast(spec, n.ahead = 1, n.roll = 1,data = daily_portfolio$portfolio_returns, out.sample = 2)
#
# garchforecast1
volatility <- function(returns) {
var(returns, na.rm=TRUE)
}
plotForSelectedCompanies <- function(daily_predicted_vol, selected_companies) {
daily_predicted_vol %>%
inner_join(selected_companies) %>%
pivot_wider(names_from = CUSIP, values_from = vol_predicted) %>%
plot_data_var("Annualized Volatilities")
}
risk_metric_vols <- daily_data %>%
arrange(DATE) %>%
group_by(CUSIP) %>%
filter(n()>100) %>%
mutate(VaR_5 = rollify(returnTop5,100)(RET),
curr_vol = rollify(volatility,100)(RET),
vol_predicted_risk_metrics = lag(curr_vol)*0.94 + 0.06*lag(RET)*lag(RET),
vol_predicted_risk_metrics = vol_predicted_risk_metrics*sqrt(252)) %>% ## Converting to annualized volatility
ungroup() %>%
select(c(DATE,CUSIP,vol_predicted_risk_metrics)) %>%
rename(vol_predicted = vol_predicted_risk_metrics)
plotForSelectedCompanies(risk_metric_vols, selected_companies)
## Joining, by = "CUSIP"
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 100 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2324 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 1709 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2218 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2698 observations
risk_metric_vols_2010 <- daily_data_2010 %>%
arrange(DATE) %>%
group_by(CUSIP) %>%
filter(n()>100) %>%
mutate(VaR_5 = rollify(returnTop5,100)(RET),
curr_vol = rollify(volatility,100)(RET),
vol_predicted_risk_metrics = lag(curr_vol)*0.94 + 0.06*lag(RET)*lag(RET),
vol_predicted_risk_metrics = vol_predicted_risk_metrics*sqrt(252)) %>% ## Converting to annualized volatility
ungroup() %>%
select(c(DATE,CUSIP,vol_predicted_risk_metrics)) %>%
rename(vol_predicted = vol_predicted_risk_metrics)
plotForSelectedCompanies(risk_metric_vols_2010, selected_companies_2010)
## Joining, by = "CUSIP"
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 825 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 100 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 315 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2001 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 1503 observations
garch_omega <-function(returns) {
mod_specify = ugarchspec(mean.model = list(armaOrder =c(1,0)), variance.model = list(model = "sGARCH",garchOrder=c(1,1)),distribution.model = "norm")
mod_fit = ugarchfit(data = returns, spec = mod_specify, out.sample =20)
as.list(coef(mod_fit)[3])
}
garch_alpha <-function(returns) {
mod_specify = ugarchspec(mean.model = list(armaOrder =c(1,0)), variance.model = list(model = "sGARCH",garchOrder=c(1,1)),distribution.model = "norm")
mod_fit = ugarchfit(data = returns, spec = mod_specify, out.sample =20)
as.list(coef(mod_fit)[4])
}
garch_beta <-function(returns) {
mod_specify = ugarchspec(mean.model = list(armaOrder =c(1,0)), variance.model = list(model = "sGARCH",garchOrder=c(1,1)),distribution.model = "norm")
mod_fit = ugarchfit(data = returns, spec = mod_specify, out.sample =20)
as.list(coef(mod_fit)[5])
}
# na_count <-sapply(daily_data_var, function(y) sum(length(which(is.na(y)))))
# na_count
## Fitting THE MODEL, receiving
fitGARCHAndPredict <- function(daily_data) {
fitted_garch_model <- daily_data %>%
arrange(DATE) %>%
group_by(CUSIP) %>%
filter(sum(!is.na(RET)) > 120) %>%
summarise(alpha1 = garch_alpha(RET),
beta1 = garch_beta(RET),
omega1 = garch_omega(RET))
daily_predicted_vol <- daily_data %>%
inner_join(fitted_garch_model) %>%
arrange(DATE) %>%
group_by(CUSIP) %>%
drop_na() %>%
mutate(alpha1 = as.numeric(alpha1),
beta1 = as.numeric(beta1),
omega1 = as.numeric(omega1),
curr_vol = rollify(volatility,100)(RET),
vol_predicted = omega1 + alpha1*lag(curr_vol) + beta1*lag(RET)*lag(RET),
vol_predicted = vol_predicted*sqrt(252)) %>% ## Converting to annualized volatility
select(c(DATE,CUSIP,vol_predicted))
}
daily_predicted_vol <- fitGARCHAndPredict(daily_data)
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## `summarise()` has grouped output by 'CUSIP'. You can override using the `.groups` argument.
## Joining, by = "CUSIP"
daily_predicted_vol_2010 <- fitGARCHAndPredict(daily_data_2010)
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## Warning in .sgarchfit(spec = spec, data = data, out.sample = out.sample, :
## ugarchfit-->warning: solver failer to converge.
## `summarise()` has grouped output by 'CUSIP'. You can override using the `.groups` argument.
## Joining, by = "CUSIP"
plotForSelectedCompanies(daily_predicted_vol, selected_companies)
## Joining, by = "CUSIP"
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 100 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2324 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 1709 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2218 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2698 observations
plotForSelectedCompanies(daily_predicted_vol_2010, selected_companies_2010)
## Joining, by = "CUSIP"
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 100 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 315 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 2001 observations
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 1503 observations